By Amin Sabbagh
Student ID: 18055392
EthOS Reference Number: 64001
Intro | Linear Regression Models | Further Insight | Data Preprocessing Intro | Data Preprocessing Continuous | Data Preprocessing Insights
Pandas: Used to extract data from Excel into dataframes
NumPy: Used for numerical computing and array operations.
Matplotlib.pyplot: Used for creating machine-learning data visualizations in Python.
Plotly Express: Used to create interactive data visualizations.
SciPy.stats: Provides functions for statistical computations and hypothesis testing.
Seaborn: A data visualization library for drawing attractive statistical graphics.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from scipy import stats
import seaborn as sns
# Specify the sheet name from which to read data
file_name = "COL.xlsx"
sheet_name = 'Sheet1'
df = pd.read_excel(file_name, sheet_name, usecols='C, D, E, F, G, H, I')
df
| CPI | Food Inflation | Resturants Inflation | Rental Price | House Price | Motor Fuel Price | Gas inflation | |
|---|---|---|---|---|---|---|---|
| 0 | 0.6 | -2.6 | 1.8 | 2.6 | 7.8 | -7.3 | -6.0 |
| 1 | 0.6 | -2.2 | 1.7 | 2.6 | 7.7 | -7.3 | -6.0 |
| 2 | 0.8 | -2.8 | 1.9 | 2.6 | 8.4 | -9.2 | -6.0 |
| 3 | 0.7 | -2.5 | 2.1 | 2.6 | 7.9 | -7.5 | -7.3 |
| 4 | 0.7 | -2.8 | 2.2 | 2.5 | 8.0 | -6.8 | -6.7 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 91 | 6.3 | 13.6 | 8.8 | 5.6 | -0.4 | -16.4 | 1.7 |
| 92 | 6.3 | 12.2 | 9.1 | 5.7 | -1.4 | -9.7 | 1.7 |
| 93 | 4.7 | 10.1 | 8.8 | 6.1 | -1.3 | -7.6 | -31.0 |
| 94 | 4.2 | 9.2 | 8.2 | 6.2 | -2.3 | -10.6 | -31.0 |
| 95 | 4.2 | 8.0 | 7.7 | 6.2 | -1.4 | -10.8 | -31.0 |
96 rows × 7 columns
food = np.array(df['Food Inflation'])
resturants = np.array(df['Resturants Inflation'])
rent = np.array(df['Rental Price'])
house = np.array(df['House Price'])
fuel = np.array(df['Motor Fuel Price'])
gas = np.array(df['Gas inflation'])
y = np.array(df['CPI'])
slope_food, intercept_food, r_food, p_food, std_err_food = stats.linregress(food, y)
def foodFunc(x):
return slope_food * x + intercept_food
foodModel = list(map(foodFunc, food))
plt.scatter(food, y)
plt.plot(food, foodModel)
plt.xlabel('Consumer Price Inflation')
plt.ylabel('Food Inflation')
plt.title('Linear Regression Model for Food Inflation')
plt.show()
slope_resturant, intercept_resturant, r_resturant, p_resturant, std_err_resturant = stats.linregress(resturants, y)
def resturantFunc(x):
return slope_resturant * x + intercept_resturant
resturantModel = list(map(resturantFunc, resturants))
plt.scatter(resturants, y)
plt.plot(resturants, resturantModel)
plt.xlabel('Consumer Price Inflation')
plt.ylabel('Food Inflation')
plt.title('Linear Regression Model for Resturants Inflation')
plt.show()
Importance of Food Inflation:
Monitoring Food Price Trends:
Predictive Power of Regression Model:
Policy Implications:
Curvilinear Relationship:
Policy Implications:
slope_rent, intercept_rent, r_rent, p_rent, std_err_rent = stats.linregress(rent, y)
def rentFunc(x):
return slope_rent * x + intercept_rent
rentModel = list(map(rentFunc, rent))
plt.scatter(rent, y)
plt.plot(rent, rentModel)
plt.xlabel('Consumer Price Inflation')
plt.ylabel('Rent Prices')
plt.title('Linear Regression Model for Rent Prices')
plt.show()
slope_house, intercept_house, r_house, p_house, std_err_house = stats.linregress(house, y)
def houseFunc(x):
return slope_house * x + intercept_house
houseModel = list(map(houseFunc, house))
plt.scatter(house, y)
plt.plot(house, houseModel)
plt.xlabel('Consumer Price Inflation')
plt.ylabel('House Prices')
plt.title('Linear Regression Model for House Prices')
plt.show()
slope_fuel, intercept_fuel, r_fuel, p_fuel, std_err_fuel = stats.linregress(fuel, y)
def fuelFunc(x):
return slope_fuel * x + intercept_fuel
fuelModel = list(map(fuelFunc, fuel))
plt.scatter(fuel, y)
plt.plot(fuel, fuelModel)
plt.xlabel('Consumer Price Inflation')
plt.ylabel('fuel Inflation')
plt.title('Linear Regression Model for fuel Inflation')
plt.show()
slope_gas, intercept_gas, r_gas, p_gas, std_err_gas = stats.linregress(gas, y)
def gasFunc(x):
return slope_gas * x + intercept_gas
gasModel = list(map(gasFunc, gas))
plt.scatter(gas, y)
plt.plot(gas, gasModel)
plt.xlabel('Consumer Price Inflation')
plt.ylabel('gas Inflation')
plt.title('Linear Regression Model for gas Inflation')
plt.show()
plt.figure(figsize=(16, 6))
heatmap = sns.heatmap(df.corr(), vmin=-1, vmax=1, annot=True, cmap='BrBG')
heatmap.set_title('Correlation of Cost Of Living', fontdict={'fontsize':18}, pad=12);
# the Coefficients of determination for each variable
#R^2 for food
COD_food=r_food*r_food;
#R^2 for resturant
COD_resturant=r_resturant*r_resturant;
#R^2 for rent
COD_rent=r_rent*r_rent;
#R^2 for house
COD_house=r_house*r_house;
#R^2 for fuel
COD_fuel=r_fuel*r_fuel;
#R^2 for gas
COD_gas=r_gas*r_gas;
categories = ['Food', 'Restaurant', 'Gas', 'Rent', 'Fuel', 'House']
r_squared_values = [COD_food, COD_resturant, COD_gas, COD_rent, COD_fuel, COD_house]
data = pd.DataFrame({'Category': categories, 'R-squared': r_squared_values})
fig = px.bar(data, x='Category', y='R-squared', title='Coefficients of determination Values for Cost of Living Crisis',
labels={'R-squared': 'R-squared'},
hover_data={'R-squared': ':.2f'},
color='R-squared',
color_continuous_scale='Blues')
fig.update_layout(yaxis_range=[0, 1])
fig.show()